*****************************************************************************
**                      Replication do-file for article                    **
** Continuity Trumps? The impact of interviewer change on item nonresponse **
**                  authors: Kristin Hajek & Nina Schumann                 ** 
*****************************************************************************

********************
** 3) Nonresponse **
********************

* -1: don't know
* -2: refusal

** W1

use "$w1data_v7\anchor1.dta", clear

drop if demodiff==1
** delete item batteries, so they only count as one question
** if you answer with don't know or refuse all of them get -1/-2
drop srs1i2-srs1i5
drop sd23i2-sd23i22
drop sd29i2-sd29i7
drop sdp2i2-sdp2i12
drop sdp9i2-sdp9i7
drop sdp10i2-sdp10i15
drop job5i2-job5i4
drop job6i2-job6i3
drop sex6i2-sex6i11
drop frt4i2-frt4i7
drop frt13i2-frt13i14
drop inc10i2-inc10i12
drop inc19i2-inc19i5
drop rtr6p2-rtr6p8
drop rtr13p2-rtr13p8
drop mig1i2-mig1i12
drop mig6i2-mig6i12
drop mig7i2-mig7i12
drop hcp3m hcp4m 
drop job16m job18m

* How many missings per person without CASI?
egen missings_w1_CAPI = anycount(val1i1-pa28 frt5-cps12), values(-1, -2, -7)			

* How many missings (don't know) without CASI?
egen missings_w1_CAPI_1 = anycount(val1i1-pa28 frt5-cps12), values(-1)			

* How many missings (refusals) without CASI?
egen missings_w1_CAPI_2 = anycount(val1i1-pa28 frt5-cps12), values(-2)			

* monthly household income missings
tab inc13, m
egen missings_w1_Eink = anycount (inc13), values (-1, -2)  
egen missings_w1_Eink_1 = anycount (inc13), values (-1)
egen missings_w1_Eink_2 = anycount (inc13), values (-2)
replace missings_w1_Eink=. if inlist(inc13,-3,-10)
replace missings_w1_Eink_1=. if inlist(inc13,-3,-10)
replace missings_w1_Eink_2=. if inlist(inc13,-3,-10)

* number of questions asked	
egen varzahl_w1_CAPI=rownonmiss(val1i1-pa28 frt5-cps12)	// all CAPI questions
egen varzahl_w1_CAPI_miss=anycount(val1i1-pa28 frt5-cps12), values(-3) 
replace varzahl_w1_CAPI=varzahl_w1_CAPI - varzahl_w1_CAPI_miss
tab varzahl_w1_CAPI, m  // range: 121-432

egen varzahl_w1=rownonmiss(val1i1-inc13)   // only until income question
egen varzahl_w1_miss=anycount(val1i1-inc13), values(-3)  
replace varzahl_w1=varzahl_w1 - varzahl_w1_miss
tab varzahl_w1, m 

keep id wave demodiff sex_gen doby_gen age cohort intsex intage intid intcont intdur varzahl* missings_*


*****************************************************************************

** W2

append using "$w2data_v6\anchor2.dta"

drop if demodiff==1
* delete questions for which it was not possible to "not" answer
drop ehc*m1* ehc*m2* ehc*m3* ehc*m4* ehc*m5* ehc*m6* ehc*m7* ehc*m8* ehc*m9* ehc*m10* ehcid

** delete item batteries, so they only count as one question
** if you answer with don't know or refuse all of them get -1/-2
drop srs1i2-srs1i5
drop sd32i2-sd32i5 sd32i6-sd32i12
drop sdp2i2-sdp2i12
drop sdp9i2-sdp9i7
drop sdp10i2-sdp10i15
drop job5i2-job5i4
drop job6i2-job6i3
drop sex6i2-sex6i11
drop frt4i2-frt4i7
drop frt13i2-frt13i14
drop frt24i2-frt24i8
drop inc10i2-inc10i12
drop crn2k*i2
drop crn12k*i2 crn12k*i3 crn12k*i4 crn12k*i5 crn12k*i6 crn12k*i7 crn12k*i8 crn12k*i9 
drop crn12k*i10 crn12k*i11 crn12k*i12 crn12k*i13 crn12k*i14
drop crn13k*i2 crn13k*i3 crn13k*i4 crn13k*i5 crn13k*i6 crn13k*i7 crn13k*i8 crn13k*i9 
drop crn13k*i10 crn13k*i11 crn13k*i12 crn13k*i13 
drop crn14k*i2 crn14k*i3 crn14k*i4 crn14k*i5 crn14k*i6 crn14k*i7 crn14k*i8 crn14k*i9 
drop crn14k*i10 crn14k*i11 crn14k*i12 crn14k*i13 
drop igr51p1i2-igr51p1i6
drop igr51p2i2-igr51p2i6
drop igr51p3i2-igr51p3i6
drop igr51p4i2-igr51p4i6
drop hcp3m 
drop inc22i2
for num 2/23: drop ehc19iX

* How many missings per person without CASI?
egen missings_w2_CAPI = anycount(val2i1-pa28 bce2i1-cps5), values(-1, -2, -7) 		

* How many missings (don't know) without CASI?
egen missings_w2_CAPI_1 = anycount(val2i1-pa28 bce2i1-cps5), values(-1) 		

* How many missings (refusals) without CASI?
egen missings_w2_CAPI_2 = anycount(val2i1-pa28 bce2i1-cps5), values(-2) 		

* monthly household income missings
tab inc13, m
egen missings_w2_Eink = anycount (inc13), values (-1, -2)  
egen missings_w2_Eink_1 = anycount (inc13), values (-1)
egen missings_w2_Eink_2 = anycount (inc13), values (-2)
replace missings_w2_Eink=. if inlist(inc13,-3,-10)
replace missings_w2_Eink_1=. if inlist(inc13,-3,-10)
replace missings_w2_Eink_2=. if inlist(inc13,-3,-10)

* number of questions asked
egen varzahl_w2_CAPI=rownonmiss(val2i1-pa28 bce2i1-cps5)	// all CAPI questions
egen varzahl_w2_CAPI_miss=anycount(val2i1-pa28 bce2i1-cps5), values(-3) 
replace varzahl_w2_CAPI=varzahl_w2_CAPI - varzahl_w2_CAPI_miss
tab varzahl_w2_CAPI, m  // range: 101-547

egen varzahl_w2=rownonmiss(val2i1-inc13)  //only until income question, without EHC
egen varzahl_w2_miss=anycount(val2i1-inc13), values(-3)
replace varzahl_w2=varzahl_w2 - varzahl_w2_miss
tab varzahl_w2 wave, m	

keep id wave demodiff sex_gen doby_gen age cohort intsex intage intid intcont intdur varzahl* missings_* 

*****************************************************************************

** W3

append using "$w3data_v5\anchor3.dta"

drop if demodiff==1
* delete questions for which it was not possible to "not" answer
drop ehc*m1* ehc*m2* ehc*m3* ehc*m4* ehc*m5* ehc*m6* ehc*m7* ehc*m8* ehc*m9* ehc*m10* ehcid

** delete item batteries, so they only count as one question
** if you answer with don't know or refuse all of them get -1/-2
drop srs1i2-srs1i5
drop sd32i2-sd32i5 sd32i6-sd32i12
drop sdp2i2-sdp2i12
drop sdp9i2-sdp9i7
drop sdp10i2 sdp10i3 sdp10i4 sdp10i5 sdp10i6 sdp10i7 sdp10i8 sdp10i9 sdp10i10 sdp10i11
drop sdp10i12 sdp10i13 sdp10i14 sdp10i15 sdp10i16 sdp10i17 sdp10i18 sdp10i19 sdp10i20
drop sdp10i21 sdp10i22 
drop job5i2-job5i4
drop job6i2-job6i3
drop sex6i2-sex6i11
drop frt4i2-frt4i7
drop inc10i2-inc10i12
drop crn2k*i2
drop crn13k*i2 crn13k*i3 crn13k*i4 crn13k*i5 crn13k*i6 crn13k*i7 crn13k*i8 crn13k*i9 
drop crn13k*i10 crn13k*i11 crn13k*i12 crn13k*i13 crn13k*i14 
drop crn14k*i2 crn14k*i3 crn14k*i4 crn14k*i5 crn14k*i6 crn14k*i7 crn14k*i8 crn14k*i9 
drop crn14k*i10 crn14k*i11 crn14k*i12 crn14k*i13 crn14k*i14 
drop hcp3m 
drop inc22i2
drop hcp4m job16m job18m
drop igr76i2-igr76i7
drop igr80i2-igr80i7
drop igr73i2-igr73i12
drop igr77i2-igr77i12
drop igr82i2-igr82i12
drop igr85i2-igr85i12
drop rtr31i2-rtr31i9 rtr31i10
drop rtr35i2-rtr35i7 rtr35i8-rtr35i14
for num 2/23: drop ehc19iX

* How many missings per person without CASI?
egen missings_w3_CAPI = anycount(val1i1-pa28 frt5-cps5), values(-1, -2, -7) 	

* How many missings (don't know) without CASI?
egen missings_w3_CAPI_1 = anycount(val1i1-pa28 frt5-cps5), values(-1) 	

* How many missings (refusals) without CASI?
egen missings_w3_CAPI_2 = anycount(val1i1-pa28 frt5-cps5), values(-2) 		

* monthly household income missings
tab inc13, m
egen missings_w3_Eink = anycount (inc13), values (-1, -2)  
egen missings_w3_Eink_1 = anycount (inc13), values (-1)
egen missings_w3_Eink_2 = anycount (inc13), values (-2)
replace missings_w3_Eink=. if inlist(inc13,-3,-10)
replace missings_w3_Eink_1=. if inlist(inc13,-3,-10)
replace missings_w3_Eink_2=. if inlist(inc13,-3,-10)

* number of questions asked
egen varzahl_w3_CAPI=rownonmiss(val1i1-pa28 frt5-cps5)	// all CAPI questions
egen varzahl_w3_CAPI_miss=anycount(val1i1-pa28 frt5-cps5), values(-3) 
replace varzahl_w3_CAPI=varzahl_w3_CAPI - varzahl_w3_CAPI_miss
tab varzahl_w3_CAPI, m  // range: 93-624

egen varzahl_w3=rownonmiss(val1i1-inc13)  // only until income question, without EHC
egen varzahl_w3_miss=anycount(val1i1-inc13), values(-3)
replace varzahl_w3=varzahl_w3 - varzahl_w3_miss
tab varzahl_w3 wave, m

keep id wave demodiff d0 sex_gen doby_gen age cohort intsex intage intid intcont intdur varzahl* missings_*

*****************************************************************************

** W4

append using "$w4data_v4\anchor4.dta"

drop if demodiff==1
* delete questions for which it was not possible to "not" answer
drop ehc*m1* ehc*m2* ehc*m3* ehc*m4* ehc*m5* ehc*m6* ehc*m7* ehc*m8* ehc*m9* ehc*m10* ehcid

** delete item batteries, so they only count as one question
** if you answer with don't know or refuse all of them get -1/-2
drop srs1i2-srs1i5
drop sd32i2-sd32i5 sd32i6-sd32i12 sd32i13
drop sdp2i2-sdp2i12
drop sdp9i2-sdp9i7
drop sdp10i2 sdp10i3 sdp10i4 sdp10i5 sdp10i6 sdp10i7 sdp10i8 sdp10i9 sdp10i10 sdp10i11
drop sdp10i12 sdp10i13 sdp10i14 sdp10i15 sdp10i16 sdp10i17 sdp10i18 sdp10i19 sdp10i20
drop sdp10i21 sdp10i22 
drop job5i2-job5i5
drop job6i2-job6i4
drop sex6i2-sex6i11
drop frt4i2-frt4i7
drop frt13i2-frt13i14
drop inc10i2-inc10i12
drop crn2k*i2 crn2k*i3
drop crn13k*i2 crn13k*i3 crn13k*i4 crn13k*i5 crn13k*i6 crn13k*i7 crn13k*i8 crn13k*i9 
drop crn13k*i10 crn13k*i11 crn13k*i12 crn13k*i13 crn13k*i14 
drop crn14k*i2 crn14k*i3 crn14k*i4 crn14k*i5 crn14k*i6 crn14k*i7 crn14k*i8 crn14k*i9 
drop crn14k*i10 crn14k*i11 crn14k*i12 crn14k*i13 crn14k*i14 
drop igr51p1i2-igr51p1i6
drop igr51p2i2-igr51p2i6
drop igr51p3i2-igr51p3i6
drop igr51p4i2-igr51p4i6
drop hcp3m 
drop inc22i2
drop igr82i2-igr82i12
drop igr85i2-igr85i12
drop crn7k*i2 crn7k*i3 crn7k*i4 crn7k*i5 crn7k*i6 crn7k*i7 crn7k*i8
drop inc29i2 inc29i3
drop hc26h2m
for num 2/23: drop ehc19iX
for num 2/11: cap drop crn7k1iX crn7k2iX crn7k3iX crn7k4iX crn7k5iX
for num 2/11: cap drop crn7k6iX crn7k7iX crn7k8iX crn7k9iX crn7k10iX

* How many missings per person without CASI?
egen missings_w4_CAPI = anycount(val2i2-pa28 bce2i1-cps5), values(-1, -2, -7) 	

* How many missings (don't know) without CASI?
egen missings_w4_CAPI_1 = anycount(val2i2-pa28 bce2i1-cps5), values(-1) 	

* How many missings (refusals) without CASI?
egen missings_w4_CAPI_2 = anycount(val2i2-pa28 bce2i1-cps5), values(-2) 	

* monthly household income missings
tab inc13, m
egen missings_w4_Eink = anycount (inc13), values (-1, -2)  
egen missings_w4_Eink_1 = anycount (inc13), values (-1)
egen missings_w4_Eink_2 = anycount (inc13), values (-2)
replace missings_w4_Eink=. if inlist(inc13,.,-10)
replace missings_w4_Eink_1=. if inlist(inc13,.,-10)
replace missings_w4_Eink_2=. if inlist(inc13,.,-10)

* number of questions asked
egen varzahl_w4_CAPI=rownonmiss(val2i2-pa28 bce2i1-cps5)	// all CAPI questions
egen varzahl_w4_CAPI_miss=anycount(val2i2-pa28 bce2i1-cps5), values(-3) 
replace varzahl_w4_CAPI=varzahl_w4_CAPI - varzahl_w4_CAPI_miss
tab varzahl_w4_CAPI, m 

egen varzahl_w4=rownonmiss(val2i2-inc13)	// only until income question
egen varzahl_w4_miss=anycount(val2i2-inc13), values(-3)
replace varzahl_w4=varzahl_w4 - varzahl_w4_miss
tab varzahl_w4	wave, m 

keep id wave demodiff d0 sex_gen doby_gen age cohort intsex intage intid intcont intdur varzahl* missings_* 

*****************************************************************************

** W5

append using "$w5data_v3\anchor5.dta"

drop if demodiff==1
* delete questions for which it was not possible to "not" answer
drop ehc*m1* ehc*m2* ehc*m3* ehc*m4* ehc*m5* ehc*m6* ehc*m7* ehc*m8* ehc*m9* ehc*m10* ehcid

** delete item batteries, so they only count as one question
** if you answer with don't know or refuse all of them get -1/-2
drop srs1i2-srs1i5
drop sd32i2-sd32i5 sd32i6-sd32i12 sd32i13
drop sdp2i2-sdp2i12
drop sdp9i2-sdp9i7
drop sdp10i2 sdp10i3 sdp10i4 sdp10i5 sdp10i6 sdp10i7 sdp10i8 sdp10i9 sdp10i10 sdp10i11
drop sdp10i12 sdp10i13 sdp10i14 sdp10i15 sdp10i16 sdp10i17 sdp10i18 sdp10i19 sdp10i20
drop sdp10i21 sdp10i22 
drop job5i2-job5i5
drop job6i2-job6i4
drop sex6i2-sex6i11
drop frt4i2-frt4i7
drop frt13i2-frt13i14
drop inc10i2-inc10i12
drop crn2k*i2 crn2k*i3
drop crn12k*i2 crn12k*i3 crn12k*i4 crn12k*i5 crn12k*i6 crn12k*i7 crn12k*i8 crn12k*i9 
drop crn12k*i10 crn12k*i11 crn12k*i12 crn12k*i13 crn12k*i14
drop crn13k*i2 crn13k*i3 crn13k*i4 crn13k*i5 crn13k*i6 crn13k*i7 crn13k*i8 crn13k*i9 
drop crn13k*i10 crn13k*i11 crn13k*i12 crn13k*i13 crn13k*i14 
drop crn14k*i2 crn14k*i3 crn14k*i4 crn14k*i5 crn14k*i6 crn14k*i7 crn14k*i8 crn14k*i9 
drop crn14k*i10 crn14k*i11 crn14k*i12 crn14k*i13 crn14k*i14 
drop hcp3m 
drop inc22i2
drop inc19i2-inc19i5
drop hcp4m job16m job18m
drop igr82i2-igr82i12
drop igr85i2-igr85i12
drop crn7k*i2 crn7k*i3 crn7k*i4 crn7k*i5 crn7k*i6 crn7k*i7 crn7k*i8
drop inc29i2 inc29i3
drop hc26h2m
drop sin8i2-sin8i4
drop hlt11i2-hlt11i3
drop sib9p*i2 sib9p*i3 sib9p*i4 sib9p*i5 sib9p*i6 sib9p*i7 sib9p*i8 sib9p*i9 sib9p*i10 sib9p*i11
drop sib9p*i12 sib9p*i13 sib9p*i14 sib9p*i15 sib9p*i16 sib9p*i17 sib9p*i18 sib9p*i19 sib9p*i20 sib9p*i21 sib9p*i22
for num 2/23: drop ehc19iX
for num 2/11: cap drop crn7k1iX crn7k2iX crn7k3iX crn7k4iX crn7k5iX
for num 2/11: cap drop crn7k6iX crn7k7iX crn7k8iX crn7k9iX crn7k10iX

* CAPI missings per person
egen missings_w5_CAPI = anycount(val1i1-pa28 frt5-cps5), values(-1, -2, -7) 	
egen missings_w5_CAPI_1 = anycount(val1i1-pa28 frt5-cps5), values(-1) 		
egen missings_w5_CAPI_2 = anycount(val1i1-pa28 frt5-cps5), values(-2) 	

* monthly household income missings
tab inc13
egen missings_w5_Eink = anycount (inc13), values (-1, -2)  
egen missings_w5_Eink_1 = anycount (inc13), values (-1)
egen missings_w5_Eink_2 = anycount (inc13), values (-2)
replace missings_w5_Eink=. if inlist(inc13,.,-10)
replace missings_w5_Eink_1=. if inlist(inc13,.,-10)
replace missings_w5_Eink_2=. if inlist(inc13,.,-10)

* number of questions asked
egen varzahl_w5_CAPI=rownonmiss(val1i1-pa28 frt5-cps5)	// all CAPI questions
egen varzahl_w5_CAPI_miss=anycount(val1i1-pa28 frt5-cps5), values(-3) 
replace varzahl_w5_CAPI=varzahl_w5_CAPI - varzahl_w5_CAPI_miss
tab varzahl_w5_CAPI, m  

egen varzahl_w5=rownonmiss(val1i1-inc13) 		// only until income question
egen varzahl_w5_miss=anycount(val1i1-inc13), values(-3)
replace varzahl_w5=varzahl_w5 - varzahl_w5_miss
tab varzahl_w5 wave, m	

keep id wave demodiff d0 sex_gen doby_gen age cohort intsex intage intid intcont intdur varzahl* missings_* 

*****************************************************************************

** W6

append using "$w6data_v2\anchor6.dta"

drop if demodiff==1
* delete questions for which it was not possible to "not" answer
drop ehc*m1* ehc*m2* ehc*m3* ehc*m4* ehc*m5* ehc*m6* ehc*m7* ehc*m8* ehc*m9* ehc*m10* ehcid

** delete item batteries, so they only count as one question
** if you answer with don't know or refuse all of them get -1/-2
drop srs1i2-srs1i5
drop sd32i2-sd32i5 sd32i6-sd32i12 sd32i13
drop sdp2i2-sdp2i12
drop sdp9i2-sdp9i7
drop sdp10i2 sdp10i3 sdp10i4 sdp10i5 sdp10i6 sdp10i7 sdp10i8 sdp10i9 sdp10i10 sdp10i11
drop sdp10i12 sdp10i13 sdp10i14 sdp10i15 sdp10i16 sdp10i17 sdp10i18 sdp10i19 sdp10i20
drop sdp10i21 sdp10i22 
drop job5i2-job5i5
drop job6i2-job6i4
drop sex6i2-sex6i11
drop frt4i2-frt4i7
drop frt13i2-frt13i14
drop inc10i2-inc10i12
drop crn2k*i2 crn2k*i3
drop crn13k*i2 crn13k*i3 crn13k*i4 crn13k*i5 crn13k*i6 crn13k*i7 crn13k*i8 crn13k*i9 
drop crn13k*i10 crn13k*i11 crn13k*i12 crn13k*i13 crn13k*i14 
drop crn14k*i2 crn14k*i3 crn14k*i4 crn14k*i5 crn14k*i6 crn14k*i7 crn14k*i8 crn14k*i9 
drop crn14k*i10 crn14k*i11 crn14k*i12 crn14k*i13 crn14k*i14 
drop hcp3m 
drop inc22i2
drop igr82i2-igr82i12
drop igr85i2-igr85i12
drop crn7k*i2 crn7k*i3 crn7k*i4 crn7k*i5 crn7k*i6 crn7k*i7 crn7k*i8
drop inc29i2 inc29i3
drop hc26h2m
drop sin8i2-sin8i4
for num 2/23: drop ehc19iX
drop igr51p1i2-igr51p1i6
drop igr51p2i2-igr51p2i6
drop igr51p3i2-igr51p3i6
drop igr51p4i2-igr51p4i6
drop net19i2-net19i15
drop net20i2-net20i15
drop net21i2-net21i15
drop net22i2-net22i15
drop net23i2-net23i15
drop net24i2-net24i15
drop net25i2-net25i15
drop net26i2-net26i16
drop net27i2-net27i16
drop net28i2-net28i16
drop net29i2-net29i16
drop net30i2-net30i16
for num 2/11: cap drop crn7k1iX crn7k2iX crn7k3iX crn7k4iX crn7k5iX
for num 2/11: cap drop crn7k6iX crn7k7iX crn7k8iX crn7k9iX crn7k10iX

* CAPI missings per person
egen missings_w6_CAPI = anycount(val2i2-pa28 bce2i1-cps5), values(-1, -2, -7) 	
egen missings_w6_CAPI_1 = anycount(val2i2-pa28 bce2i1-cps5), values(-1) 		
egen missings_w6_CAPI_2 = anycount(val2i2-pa28 bce2i1-cps5), values(-2) 	

* monthly household income missings
tab inc13
egen missings_w6_Eink = anycount (inc13), values (-1, -2)  
egen missings_w6_Eink_1 = anycount (inc13), values (-1)
egen missings_w6_Eink_2 = anycount (inc13), values (-2)
replace missings_w6_Eink=. if inlist(inc13,.,-10)
replace missings_w6_Eink_1=. if inlist(inc13,.,-10)
replace missings_w6_Eink_2=. if inlist(inc13,.,-10)

* number of questions asked
egen varzahl_w6_CAPI=rownonmiss(val2i2-pa28 bce2i1-cps5)	// all CAPI questions
egen varzahl_w6_CAPI_miss=anycount(val2i2-pa28 bce2i1-cps5), values(-3) 
replace varzahl_w6_CAPI=varzahl_w6_CAPI - varzahl_w6_CAPI_miss
tab varzahl_w6_CAPI, m  

egen varzahl_w6=rownonmiss(val2i2-inc13) 		// only until income question
egen varzahl_w6_miss=anycount(val2i2-inc13), values(-3)
replace varzahl_w6=varzahl_w6 - varzahl_w6_miss
tab varzahl_w6 wave, m	

keep id wave demodiff d0 sex_gen doby_gen age cohort intsex intage intid intcont intdur varzahl* missings_* 

*****************************************************************************

** W7

append using "$w7data\anchor7.dta"

drop if demodiff==1
* delete questions for which it was not possible to "not" answer
drop ehc*m1* ehc*m2* ehc*m3* ehc*m4* ehc*m5* ehc*m6* ehc*m7* ehc*m8* ehc*m9* ehc*m10* ehcid

** delete item batteries, so they only count as one question
** if you answer with don't know or refuse all of them get -1/-2
drop srs1i2-srs1i5
drop sd32i2-sd32i5 sd32i6-sd32i12 sd32i13
drop sdp2i2-sdp2i12
drop sdp9i2-sdp9i7
drop sdp10i2 sdp10i3 sdp10i4 sdp10i5 sdp10i6 sdp10i7 sdp10i8 sdp10i9 sdp10i10 sdp10i11
drop sdp10i12 sdp10i13 sdp10i14 sdp10i15 sdp10i16 sdp10i17 sdp10i18 sdp10i19 sdp10i20
drop sdp10i21 sdp10i22 
drop job5i2-job5i5
drop job6i2-job6i4
drop sex6i2-sex6i11
for num 2/16: cap drop frt13iX
drop frt4i2-frt4i7
drop inc10i2-inc10i12
drop crn2k*i2 crn2k*i3
drop crn13k*i2 crn13k*i3 crn13k*i4 crn13k*i5 crn13k*i6 crn13k*i7 crn13k*i8 crn13k*i9 
drop crn13k*i10 crn13k*i11 crn13k*i12 crn13k*i13 crn13k*i14 
drop crn14k*i2 crn14k*i3 crn14k*i4 crn14k*i5 crn14k*i6 crn14k*i7 crn14k*i8 crn14k*i9 
drop crn14k*i10 crn14k*i11 crn14k*i12 crn14k*i13 crn14k*i14 
drop hcp3m 
drop inc22i2
drop igr82i2-igr82i12
drop igr85i2-igr85i12
drop crn7k*i2 crn7k*i3 crn7k*i4 crn7k*i5 crn7k*i6 crn7k*i7 crn7k*i8
drop inc29i2 inc29i3
drop hc26h2m
drop sin8i2-sin8i4
for num 2/23: drop ehc19iX
drop crn61i2-crn61i5
for num 2/22: drop sib9p1iX
for num 2/22: drop sib9p2iX
for num 2/22: drop sib9p3iX
for num 2/22: drop sib9p4iX
drop hcp4m job16m job18m
drop hlt11i2-hlt11i3
for num 2/8: drop crn59k1iX crn59k2iX  crn59k3iX crn59k4iX crn59k5iX
for num 2/8: drop crn59k6iX crn59k7iX  crn59k8iX crn59k9iX crn59k10iX
for num 2/14: drop crn55k1i1mX crn55k1i2mX
for num 2/14: drop crn55k2i1mX crn55k2i2mX
for num 2/14: drop crn55k3i1mX crn55k3i2mX
for num 2/14: drop crn55k4i1mX crn55k4i2mX
for num 2/14: drop crn55k5i1mX crn55k5i2mX
for num 2/14: drop crn55k6i1mX crn55k6i2mX
for num 2/14: drop crn55k7i1mX crn55k7i2mX
for num 2/14: drop crn55k8i1mX crn55k8i2mX
for num 2/14: drop crn55k9i1mX crn55k9i2mX
for num 2/14: drop crn55k10i1mX crn55k10i2mX
for num 2/11: cap drop crn7k1iX crn7k2iX crn7k3iX crn7k4iX crn7k5iX
for num 2/11: cap drop crn7k6iX crn7k7iX crn7k8iX crn7k9iX crn7k10iX

* CAPI missings per person
egen missings_w7_CAPI = anycount(val1i1-pa35i2 frt5-cps5), values(-1, -2, -7) 	
egen missings_w7_CAPI_1 = anycount(val1i1-pa35i2 frt5-cps5), values(-1) 		
egen missings_w7_CAPI_2 = anycount(val1i1-pa35i2 frt5-cps5), values(-2) 	

* monthly household income missings
tab inc13
egen missings_w7_Eink = anycount (inc13), values (-1, -2)  
egen missings_w7_Eink_1 = anycount (inc13), values (-1)
egen missings_w7_Eink_2 = anycount (inc13), values (-2)
replace missings_w7_Eink=. if inlist(inc13,.,-10)
replace missings_w7_Eink_1=. if inlist(inc13,.,-10)
replace missings_w7_Eink_2=. if inlist(inc13,.,-10)

* number of questions asked 
egen varzahl_w7_CAPI=rownonmiss(val1i1-pa35i2 frt5-cps5)	// all CAPI questions
egen varzahl_w7_CAPI_miss=anycount(val1i1-pa35i2 frt5-cps5), values(-3) 
replace varzahl_w7_CAPI=varzahl_w7_CAPI - varzahl_w7_CAPI_miss
tab varzahl_w7_CAPI, m  

egen varzahl_w7=rownonmiss(val1i1-inc13) 		// only until income question
egen varzahl_w7_miss=anycount(val1i1-inc13), values(-3)
replace varzahl_w7=varzahl_w7 - varzahl_w7_miss
tab varzahl_w7 wave, m	

keep id wave demodiff d0 sex_gen doby_gen age cohort intsex intage intid intcont intdur varzahl* missings_* 

*****************************************************************************

** combine infos of all waves:

sort id wave

gen missings_CAPI=.
gen missings_CAPI_1=.
gen missings_CAPI_2=.
forvalues x=1/7 {
	replace missings_CAPI=missings_w`x'_CAPI if wave==`x'
	replace missings_CAPI_1=missings_w`x'_CAPI_1 if wave==`x'
	replace missings_CAPI_2=missings_w`x'_CAPI_2 if wave==`x'
}
gen varzahl_CAPI=.
gen varzahl=.
forvalues x=1/7 {
	replace varzahl_CAPI=varzahl_w`x'_CAPI if wave==`x'
	replace varzahl=varzahl_w`x' if wave==`x'
}

gen missingsPC_CAPI=(missings_CAPI/varzahl_CAPI)*100
gen missingsPC_CAPI_1=(missings_CAPI_1/varzahl_CAPI)*100 
gen missingsPC_CAPI_2=(missings_CAPI_2/varzahl_CAPI)*100 

* multinomial (discarding changes from -1 to -2 and vice versa)
gen missingsPC_CAPI_1_n=(missings_CAPI_1/(varzahl_CAPI-missings_CAPI_2))*100 
gen missingsPC_CAPI_2_n=(missings_CAPI_2/(varzahl_CAPI-missings_CAPI_1))*100 

gen missings_Eink=.
gen missings_Eink_1=.
gen missings_Eink_2=.
forvalues x=1/7 {
	replace missings_Eink=missings_w`x'_Eink if wave==`x'
	replace missings_Eink_1=missings_w`x'_Eink_1 if wave==`x'
	replace missings_Eink_2=missings_w`x'_Eink_2 if wave==`x'
}

* multinomial (discarding changes from -1 to -2 and vice versa)
gen missings_Eink_3=missings_Eink_1
replace missings_Eink_3=2 if missings_Eink_2==1  
recode missings_Eink_3 0=3 // baseline: no INR
tab missings_Eink_3, m
gen missings_Eink_1_n=missings_Eink_1 if missings_Eink_2!=1	
gen missings_Eink_2_n=missings_Eink_2 if missings_Eink_1!=1	

drop missings_w*_Eink* missings_w*_CAPI* varzahl_w* missings_Eink_3

save "$Intwechsel\3_nonres_1.dta", replace

*****************************************************************************


***********************************************
** finalize nonresponse-interviewer data set **
***********************************************

use "$Intwechsel\3_nonres_1.dta", clear

tab wave, m
tab demodiff
tab intid, m
drop if wave==-10

** add interviewer change:

merge m:1 id using "$Intwechsel\1_int.dta"
keep if _merge==3		
drop _merge

gen intchange=intchangeW1 if wave==1
replace intchange=intchangeW2 if wave==2
replace intchange=intchangeW3 if wave==3
replace intchange=intchangeW4 if wave==4
replace intchange=intchangeW5 if wave==5
replace intchange=intchangeW6 if wave==6
replace intchange=intchangeW7 if wave==7

drop intchangeW?

** add interviewer experience and exit year

* 1) number of interviews till respective interview
merge 1:1 id wave using "$Intwechsel\0_interviewer_1.dta", keepusing(intnum)
drop _merge
*br id wave intid intnum intchange

* 2) exit year of interviewer
merge m:1 intid using "$Intwechsel\0_interviewer_2.dta", keepusing(exityear) 
drop _merge

save "$Intwechsel\3_help.dta", replace

*****************************************************************************

** merge "respondent moving" variable:

use "$Intwechsel\3_help.dta", clear

merge m:1 id using "$Intwechsel\2_move.dta"

tab _merge
unique id if _merge==1  // 2815: master only 
	l id demodiff if _merge==1
unique id if _merge==3  // 9587
	preserve
		keep if _merge==1 // not in the moving data set
		drop _merge	
		bysort id: keep if _n==_N
		merge 1:m id using "$w7data\biomob_ehc.dta"
		tab _merge // no case merged --> these cases not in biomob_ehc --> did not move 
		l id if _merge==3
	restore

drop if _merge==2	
drop _merge

gen move=0 if wave==1
forvalues x=2/7 {
	replace move=1 if wave==`x' & move_w`x'==1
}
tab move wave, m
recode move .=0

gen resdis=. 
forvalues x=2/7 {
	replace resdis=resdis_w`x' if wave==`x' & move_w`x'==1
}
recode resdis -99 -98 -2=. // 221 missings
forvalues x=2/7 {
	recode resdis .=0 if wave==`x' & move_within_w`x'==1
}	

tab move, m
drop move_w*

tab resdis, m
tab move if resdis!=., m
tab resdis if move==1, m  // 263 missing distances (of 6,112 moves)

** move of more than 100 km
gen move_dist=(resdis>100 & resdis!=. & move==1)

** move of less than 100 km (also within city)
gen move_close=(resdis<=100 & move==1)

** move within city
gen move_city=(resdis==0 & move==1)

tab move_dist move_close, m
tab move_dist move, m

count // 53,447

tab move intchange, m col  
* 25.1 % of observations with interviewer change did move residence (within or between cities)

tab intchange if move==1 & resdis==0, m   
* 11 % of observations with move within the same city (resdis=0) experienced interviewer change

tab intchange if move==1 & resdis!=0, m   
* 27 % of observations with resdis not 0 experienced interviewer change

tab intchange if move_dist==1, m
* 50 % of observations with move>100 km experienced interviewer change

tab intchange if move_close==1, m
* 14 % of observations with move<=100 km experienced interviewer change

tab intchange if move_city==1, m
* 11 % of observations with move within same city (resdis=0) experienced interviewer change

sort id wave
drop resdis_w* between_w*

save  "$Intwechsel\3_nonres_2.dta", replace




